Purpose

This document creates plots of the network of Jaccard similarity indices for some of the exemplars rated as most self-similar. It builds on the exploratory work contained in graph-network-visualizations.Rmd.

Set-up

Import data

Jaccard indices

The Jaccard index data are found in analysis/data/jaccard.csv.

jaccard_raw <- readr::read_csv("analysis/data/jaccard.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Exemplar.Row = col_double(),
##   Exemplar.Col = col_double(),
##   Jaccard = col_double(),
##   Group = col_character()
## )
str(jaccard_raw)
## spec_tbl_df [950 Ă— 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Exemplar.Row: num [1:950] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Exemplar.Col: num [1:950] 2 2 2 2 2 3 3 3 3 3 ...
##  $ Jaccard     : num [1:950] 0.0476 0.1186 0.1228 0.2 0.2692 ...
##  $ Group       : chr [1:950] "P31M" "P3M1" "P6M" "P6" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Exemplar.Row = col_double(),
##   ..   Exemplar.Col = col_double(),
##   ..   Jaccard = col_double(),
##   ..   Group = col_character()
##   .. )

It’s probably wise to reorder the data frame by wallpaper group, Jaccard index, and exemplar index.

jaccard <- jaccard_raw %>%
  dplyr::arrange(., Group, Exemplar.Row, desc(Jaccard))

Let’s add a Jaccard mean and median by Exemplar.Row.

jaccard_aug <- jaccard %>%
  dplyr::group_by(., Group, Exemplar.Row) %>%
  dplyr::mutate(.,
    j_mean = mean(Jaccard),
    j_med = median(Jaccard),
    j_max = max(Jaccard),
    j_min = min(Jaccard)
  )

Plots

Outliers and their connections

For each wallpaper group, pick the exemplar pair with the most extreme (highest) Jaccard value. Then plot the set of Jaccard indices for both members of the pair.

Create helper function to pick most extreme pair.

pick_n_pairs_max_jaccard <- function(wp_group = "P1", df = jaccard, n_pairs = 1) {
  this_df <- df %>%
    dplyr::filter(., Group == wp_group) %>%
    dplyr::arrange(., desc(Jaccard))
  
  this_df[1:n_pairs,]
}

Now, do this for all of the wallpaper groups.

wp_groups <- c("P1", "P31M", "P3M1", "P6", "P6M")

exemplars_max_jaccard <- purrr::map_df(wp_groups, pick_n_pairs_max_jaccard)

exemplars_max_jaccard
## # A tibble: 5 x 4
##   Exemplar.Row Exemplar.Col Jaccard Group
##          <dbl>        <dbl>   <dbl> <chr>
## 1            8            9   0.435 P1   
## 2            2            7   0.65  P31M 
## 3           19           20   0.404 P3M1 
## 4            6           13   0.558 P6   
## 5           10           20   0.383 P6M

Heatmaps

Let’s create a simplified color scale.

value_breaks <- c(0, .2, .4, .6, .8)
value_colors <-
    colorRampPalette(RColorBrewer::brewer.pal(4, "Oranges"))(4)
legend_text <- c("<.2", ".2-.4", ".4-.6", ">.6")

Create helper function to generate heatmaps.

plot_heatmap <- function(wp_group = "P1",
                         df = jaccard,
                         show_legend = FALSE) {
  # Select wp_group
  this_df <- df %>%
    dplyr::filter(., Group == wp_group)
  
  # Turn Jaccard data into matrix
  j_matrix <- matrix(nrow = 20, ncol = 20)
  for (r in 1:190) {
    j_matrix[this_df$Exemplar.Row[r], this_df$Exemplar.Col[r]] <-
      this_df$Jaccard[r]
  }
  
  title_txt <- paste0(
    wp_group,
    ": max= ",
    format(max(this_df$Jaccard), digits = 2, nsmall = 2),
    " | mean= ",
    format(
      mean(this_df$Jaccard),
      digits = 2,
      nsmall = 2
    )
  )
  
  # value_breaks <- c(0, .2, .4, .6, .8)
  # value_colors <-
  #   colorRampPalette(RColorBrewer::brewer.pal(4, "Oranges"))(4)
  
  heatmap(
    j_matrix,
    Rowv = NA,
    Colv = NA,
    main = title_txt,
    symm = TRUE,
    col = value_colors,
    breaks = value_breaks
  )
  
  if (show_legend) {
    legend(
      x = "bottomright",
      legend = legend_text,
      fill = value_colors
    )
  }
  
  # if (save_to_file) {
  #   png(paste0("img/", wp_group, "-", "jaccard-heatmap.png"))
  #   heatmap(
  #     j_matrix,
  #     Rowv = NA,
  #     Colv = NA,
  #     main = title_txt,
  #     symm = TRUE,
  #     col = value_colors,
  #     breaks = value_breaks
  #   )
  #   
  #   if (show_legend) {
  #     legend(
  #       x = "bottomright",
  #       legend = c("<.2", ".2-.4", ".4-.6", ">.6"),
  #       fill = colorRampPalette(RColorBrewer::brewer.pal(4, "Oranges"))(4)
  #     )
  #   }
  #   dev.off()
  #}
}

Test the function with default values.

plot_heatmap("P1")

Now, let’s plot the same for all wallpaper groups.

plot_heatmap("P31M")

plot_heatmap("P3M1")

plot_heatmap("P6")

plot_heatmap("P6M")

These are saved in img/.

Connectivity networks

Now, for each member of the most similar exemplar pair, we show the connectivity network.

Create helper function.

make_jaccard_network <- function(wp_group = "P1", df = jaccard) {
  this_df <- df %>%
    dplyr::filter(., Group == wp_group) %>%
    dplyr::arrange(., Exemplar.Row, Exemplar.Col)
  
  this_edges <- tibble(
    from = this_df$Exemplar.Row,
    to = this_df$Exemplar.Col,
    weight = this_df$Jaccard
  )
  
  this_nodes <- tibble::tibble(id = 1:20)
  
  tidygraph::tbl_graph(nodes = this_nodes,
                       edges = this_edges,
                       directed = FALSE)
  
}

Test with default parameters.

(p1_df <- make_jaccard_network())
## # A tbl_graph: 20 nodes and 190 edges
## #
## # An undirected simple graph with 1 component
## #
## # Node Data: 20 x 1 (active)
##      id
##   <int>
## 1     1
## 2     2
## 3     3
## 4     4
## 5     5
## 6     6
## # … with 14 more rows
## #
## # Edge Data: 190 x 3
##    from    to weight
##   <int> <int>  <dbl>
## 1     1     2  0.269
## 2     1     3  0.179
## 3     1     4  0.119
## # … with 187 more rows

Select a specific exemplar and categorize the Jaccard index values.

select_exemplar <- function(network_df =  make_jaccard_network(), exemplar_id = 8) {
  df <- network_df %>%
    activate(edges) %>%
    dplyr::filter(., from == exemplar_id | to == exemplar_id) %>%
    dplyr::mutate(weight = cut(weight, c(0, .2, .4, .6, .8), 
                               labels = c("<.2", ".2-.4", ".4-.6", ">.6")))
  df
}

select_exemplar()
## # A tbl_graph: 20 nodes and 19 edges
## #
## # An unrooted tree
## #
## # Edge Data: 19 x 3 (active)
##    from    to weight
##   <int> <int> <fct> 
## 1     1     8 <.2   
## 2     2     8 .2-.4 
## 3     3     8 .2-.4 
## 4     4     8 .2-.4 
## 5     5     8 <.2   
## 6     6     8 <.2   
## # … with 13 more rows
## #
## # Node Data: 20 x 1
##      id
##   <int>
## 1     1
## 2     2
## 3     3
## # … with 17 more rows

Now, plot the edge values.

ggraph(select_exemplar(), layout = "linear", circular = TRUE) +
  geom_edge_arc(aes(color = weight)) +
  geom_node_text(aes(label = id), size = 6) +
  theme_graph() +
  coord_fixed() +
  # NOTE: the drop = FALSE ensures that the full range of scales is used!
  scale_edge_color_manual(name = "Jaccard",
                          values = value_colors,
                          drop = FALSE) +
  theme(legend.text = element_text(size = 14)) +
  theme(legend.title = element_text(size = 16))

plot_jaccard_vals <-
  function(network_df = make_jaccard_network(),
           exemplar_id = 8,
           wp_group = "P1") {

    df <- select_exemplar(network_df, exemplar_id)
    
    ggraph(df, layout = "linear", circular = TRUE) +
      geom_edge_arc(aes(color = weight)) +
      geom_node_text(aes(label = id), size = 6) +
      theme_graph() +
      coord_fixed() +
      # NOTE: the drop = FALSE ensures that the full range of scales is used!
      scale_edge_color_manual(name = "Jaccard",
                              values = value_colors,
                              drop = FALSE) +
      theme(legend.text = element_text(size = 14)) +
      theme(legend.title = element_text(size = 16))
  }

Test with default parameters.

P1 #8

plot_jaccard_vals()

P1 #9

And its companion.

plot_jaccard_vals(exemplar_id = 9)

P31M #2

plot_jaccard_vals(make_jaccard_network(wp_group = "P31M", df = jaccard), exemplar_id = 2, wp_group = "P31M")

P31M #7

plot_jaccard_vals(make_jaccard_network(wp_group = "P31M", df = jaccard), exemplar_id = 7, wp_group = "P31M")

P3M1 #19

plot_jaccard_vals(make_jaccard_network(wp_group = "P3M1", df = jaccard), exemplar_id = 19, wp_group = "P3M1")

P3M1 #20

plot_jaccard_vals(make_jaccard_network(wp_group = "P3M1", df = jaccard), exemplar_id = 20, wp_group = "P3M1")

P6 #6

plot_jaccard_vals(make_jaccard_network(wp_group = "P6", df = jaccard), exemplar_id = 6, wp_group = "P6")

P6 #13

plot_jaccard_vals(make_jaccard_network(wp_group = "P6", df = jaccard), exemplar_id = 13, wp_group = "P6")

P6M #10

plot_jaccard_vals(make_jaccard_network(wp_group = "P6M", df = jaccard), exemplar_id = 10, wp_group = "P6M")

P6M #20

plot_jaccard_vals(make_jaccard_network(wp_group = "P6M", df = jaccard), exemplar_id = 20, wp_group = "P6M")

Now, let’s put the pieces together. Not used at this time

graph_network_for_max_pairs <- function(wp_group = "P1", df = jaccard,
                                        save_to_file = FALSE) {
  
  this_network <- make_jaccard_network(wp_group, df)
  
  this_pair <- pick_n_pairs_max_jaccard(wp_group, df)
  
  p1 <- plot_jaccard_vals(this_network,
                    this_pair$Exemplar.Row,
                    wp_group
                    )
  
  p2 <- plot_jaccard_vals(this_network,
                    this_pair$Exemplar.Col,
                    wp_group
                    ) 

  ggpubr::ggarrange(p1, p2, ncol = 2, nrow = 1,
                    labels = c(paste0(wp_group, " #", this_pair$Exemplar.Row), paste0(wp_group, " #", this_pair$Exemplar.Col)),
                    common.legend = TRUE,
                    legend = "bottom")
}

Test with default values

graph_network_for_max_pairs()